
CHEATSHEET 



( Python and R Codes) 



Supervised Learning 

• Decision Tree • Random Forest 
-kNN - Logistic Regression 


Unsupervised Learning | 

• Apriori algorithm • k-means 

• Hierarchical Clustering 


Reinforcement Learning 

• Markov Decision Process 

• Q Learning 



Python 


R 

Code 


Code 



#Import Library 

#Import other necessary libraries like pandas, 
#numpy. . . 

from sklearn import linearjnodel 

#Load Train and Test datasets 

#Identify feature and response variable(s) and 

#values must be numeric and numpy arrays 

x_train=input_variables_values_training_datasets 

y_train=target_variables_values_training_datasets 

x_test=input_variables_values_test_datasets 

#Create linear regression object 

linear = linearjnodel. LinearRegression( ) 

#Train the model using the training sets and 
#check score 

linear .fit(x_train, yjtrain) 
linear . score(xjtrain, y_train) 

#Equation coefficient and Intercept 
print (' Coefficient : \n', linear .coef_) 
print (' Intercept : \n', linear . intercept_) 

#Predict Output j 

predicted= linear . predict (x_test) 


#Load Train and Test datasets 

#Identify feature and response variable(s) and 

#values must be numeric and numpy arrays 

x_train <- input_variables_values_training_datasets 

y_train <- target_variables_values_training_datasets 

x_test <- input_variables_values_test_datasets 

x <- cbind(x_train, yjtrain) 

#Train the model using the training sets and 
#check score 

linear <- lm(y_train ~ . , data = x) 
summary (linear) 

#Predict Output 

predict ed= predict (linear, xjtest) 



#Import Library 

from sklearn . linearjnodel import LogisticRegression 
#Assumed you have, X (predictor) and Y (target) 

#for training data set and x_test (predictor) 

#of test_dataset 

#Create logistic regression object 

model = LogisticRegressionQ 

#Train the model using the training sets 

#and check score 

model. fit(X, y) 

model. score(X, y) 

#Equation coefficient and Intercept 
print ( 1 Coefficient : \n', model. coef_) 
print (' Intercept : \n ' , model . intercept_) 

#Predict Output 

predicted= model . predict (x_test) 


x <- cbind(x_train,y_train) 

#Train the model using the training sets and check 
#score 

logistic <- glm(y_train ~ ., data = x,family= ' binomial ' ) 
summary (logistic) 

#Predict Output 

predicted= predict (logistic, xjtest) 



#Import Library 

#Import other necessary libraries like pandas, numpy. 
from sklearn import tree 

#Assumed you have, X (predictor) and Y (target) for 
#training data set and xjtest (predictor) of 
#test_dataset 
#Create tree object 

model = tree.DecisionTreeClassifier(criterion='gini ' ) 
#for classification, here you can change the 
#algorithm as gini or entropy (information gain) by 
#default it is gini 

#model = tree.DecisionTreeRegressor() for 
degression 

#Train the model using the training sets and check 
#sco re 

model. fit(X, y) 
model . score(X, y) 

#Predict Output 

predicted= model . predict (xjtest) 


#Import Library 
library(rpart ) 
x <- cbind(x_train,y_train) 

#grow tree 

fit <- rpart (y_train ~ ., data = x,method="class") 
summary (fit) 

#Predict Output 

predicted= predict(fit,x_test) 



#Import Library 

#Import Library 

from sklearn import svm 

library(el071) 

#Assumed you have, X (predictor) and Y (target) for 

x <- cbind(x_train,y_train) 

draining data set and x_test(predictor) of test_dataset 

#Fitting model 

#Create SVM classification object 

fit <-svm(y_train ~ ., data = x) 

model = svm.svcQ 

summary(f it ) 

dhere are various options associated 

#Predict Output 

with it, this is simple for classification. 

drain the model using the training sets and check 

#score 

model. fit(X, y) 

model. score(X, y) 

#Predict Output 

predicted= model. predict(x_test) 

predicted= predict(fit,x_test) 



#Import Library 

#Import Library 

</> 

o 

from sklearn . naive_bayes import GaussianNB 

library(el071) 

#Assumed you have, X (predictor) and Y (target) for 

x <- cbind(x_train,y_train) 

CQ 

draining data set and xjtest (predictor) of test_dataset 

#Fitting model 

#Create SVM classification object model = GaussianNB() 

fit <-naiveBayes(y_train ~ ., data = x) 

dhere is other distribution for multinomial classes 

summary(fit ) 

O 

W 

like Bernoulli Naive Bayes 

#Predict Output 

drain the model using the training sets and check 

#sco re 

model. fit(X, y) 

#Predict Output 

predicted= model . predict (xjtest) 

predicted= predict(fit,x_test) 


"3T 

#Import Library 

#Import Library 

o 

from sklearn. neighbors import KNeighborsClassifier 

library(knn) 

h 

#Assumed you have, X (predictor) and Y (target) for 

x <- cbind(x_train,y_train) 

o 

draining data set and x_test(predictor) of test_dataset 

#Fitting model 

2 

#Create KNeighbors classifier object model 

fit <-knn(y_train ~ ., data = x,k=5) 

to 

KNeighborsClassifier(n_neighbors=6) 

summary(f it) 

o 



#Predict Output 

cz 

#default value for n_neighbors is 5 

o 


predicted= predict(fit ,x_test) 

2 

drain the model using the training sets and check score 


1 

model. fit(X, y) 



#Predict Output 


I 

predicted= model. predict (xjtest) 




#Import Library 

#Import Library 


from sklearn. cluster import KMeans 

library(cluster) 

</) 

#Assumed you have, X (attributes) for training data set 

fit <- kmeans(X, 3) 

#and xjtest (attributes) of test_dataset 

#5 cluster solution 

CO 

0) 

#Create KNeighbors classifier object model 


s 

kjneans = KMeans(n_clusters=3, random_state=0) 


1 

#Train the model using the training sets and check score 

model. fit(X) 

#Predict Output 

predicted= model. predict (xjtest) 




#Import Library 

#Import Library 

to 

from sklearn. ensemble import RandomForestClassif ier 

library (randomForest) 

<X> 

#Assumed you have, X (predictor) and Y (target) for 

x <- cbind(x_train,y_train) 

o 

LL 

draining data set and xjtest (predictor) of test_dataset 

#Fitting model 

= 

#Create Random Forest object 

fit <- randomForest (Species ~ ., x,ntree=500) 

E 

o 

model= RandomForestClassif ier( ) 

summary(f it) 


drain the model using the training sets and check score 

#Predict Output 

CO 

cc 

model. fit(X, y) 

#Predict Output 

predicted= model. predict (xjtest) 

predicted= predict (fit, x_test) 


CO 



E 

#Import Library 

#Import Library 

o 

from sklearn import decomposition 

library(stats) 

C) 

#Assumed you have training and test data set as train and 

pea <- princomp(train, cor = TRUE) 


dest 

train_reduced <- predict (pea, train) 

o 
■ ■■■ 

#Create PCA object pca= decomposition. PCA(n_components=k) 

test_reduced <- predict (pea, test ) 

o 

#default value of k =min(n_sample, n_features) 


^3 



o 

cc 

#For Factor analysis 



#fa= decomposition. FactorAnalysis() 


75 

#Reduced the dimension of training dataset using PCA 


o 

train_reduced = pea .f it_transform(train) 


(/) 

#Reduced the dimension of test dataset 


Q) 

E 

test_reduced = pca.transform(test) 


■ 





#Import Library 

from sklearn. ensemble import GradientBoostingClassifier 
#Assumed you have, X (predictor) and Y (target) for 
draining data set and x_test(predictor) of test_dataset 
#Create Gradient Boosting Classifier object 
model= GradientBoostingClassif ier(n_estimators=100, \ 

learning_rate=1.0, max_depth=l, random_state=0) 
#Train the model using the training sets and check score 
model. fit(X, y) 

#Predict Output 

predicted= model. predict(x_test) 


#Import Library 

library(caret) 

x <- cbind(x_train,y_train) 

#Fitting model 

fitControl <- trainControl( method = "repeatedcv", 
+ number = 4, repeats = 4) 

fit <- train(y ~ ., data = x, method = "gbm", 

+ trControl = fitControl, verbose = FALSE) 
predicted= predict(fit,x_test, type= "prob") [, 2] 


To view complete guide on Machine Learning Algorithms, visit here : 

http://bit.Iv/1 DOUS8N a/SS 


www.analyticsvidhya.com 















